1 Setup

Set the working dir

setwd("/mnt/picea/projects/spruce/onilsson/cone-development/")
library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, as.vector, cbind,
##     colnames, do.call, duplicated, eval, evalq, Filter, Find, get,
##     grep, grepl, intersect, is.unsorted, lapply, lengths, Map,
##     mapply, match, mget, order, paste, pmax, pmax.int, pmin,
##     pmin.int, Position, rank, rbind, Reduce, rownames, sapply,
##     setdiff, sort, table, tapply, union, unique, unlist, unsplit
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: Rcpp
## Loading required package: RcppArmadillo
library(VennDiagram)
## Loading required package: grid
## Loading required package: futile.logger
library(RColorBrewer)
source("~/Git/UPSCb/src/R/volcanoPlot.R")
## Loading required package: limma
## 
## Attaching package: 'limma'
## The following object is masked from 'package:DESeq2':
## 
##     plotMA
## The following object is masked from 'package:BiocGenerics':
## 
##     plotMA
source("~/Git/UPSCb/src/R/plotMA.R")

pal <- brewer.pal(8,"Dark2")

2 Differential expression

Read the sample information

samples <- read.csv("~/Git/UPSCb/projects/spruce-cone-development/doc/samples.csv",
                    row.names=1,as.is=TRUE)

Load the count table

count.table <- read.csv("analysis/HTSeq/raw-unormalised_tech-rep-combined_data.csv",
                        row.names=1,as.is=TRUE)

Block date effect

dds <- DESeqDataSetFromMatrix(
  countData = count.table,
  colData = data.frame(condition=colnames(count.table),
                       sex=samples$Sex,
                       date=samples$Sampling,
                       rep=samples$Sample),
  design = ~date+sex)


dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
plotDispEsts(dds)

Compare male with female (date blocked)

res <- results(dds,contrast=c("sex","M","F"))
alpha=0.01
BiocGenerics::plotMA(res,alpha=alpha)

volcanoPlot(res,alpha=1e-6)
## Loading required package: LSD

hist(res$padj,breaks=seq(0,1,.01))

Number of DE genes, cutoff 1e-6

sum(res$padj<1e-6,na.rm=TRUE)
## [1] 3729

Number of down- and upregulated genes

table(sign(res$log2FoldChange[res$padj<1e-6]))
## 
##   -1    1 
## 2153 1576

Compare different cutoffs for padj and log2FoldChange

plot(density(res$log2FoldChange[res$padj<1e-6 & !is.na(res$padj)]))
abline(v=c(-1,1))

plot(density(res$log2FoldChange[res$padj<1e-3 & !is.na(res$padj)]))
abline(v=c(-1,1))

plot(density(res$log2FoldChange[res$padj<1e-3 & !is.na(res$padj) & abs(res$log2FoldChange) > 1]))

hist(res$log2FoldChange[res$padj<1e-3 & !is.na(res$padj) & abs(res$log2FoldChange) > 1])

hist(res$log2FoldChange[res$padj<1e-3 & !is.na(res$padj) & abs(res$log2FoldChange) > 1],breaks=seq(-10,10,0.5))
abline(v=c(-2,2))

hist(res$log2FoldChange[res$padj<1e-3 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1.5],breaks=seq(-10,10,0.5))

hist(res$log2FoldChange[res$padj<1e-6 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1.5],breaks=seq(-10,10,0.5))

hist(res$log2FoldChange[res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1.5],breaks=seq(-10,10,0.5))

Number of DE genes, padj cutoff 1e-2 + log2FoldChange cutoff 1.5

sum(res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1.5)
## [1] 2432
sex.effect.date.blocked <- rownames(res[!is.na(res$padj) & res$padj < alpha,])

Write files List with P-value and fold change per gene

write.csv(res,file="analysis/DESeq2/differential-expression_M-vs-F_date-blocked.csv")
write.csv2(res,file="analysis/DESeq2/differential-expression_M-vs-F_date-blocked_semi-colon.csv")

List of DE genes, padj cutoff 1e-2 + log2FoldChange cutoff 1.5

write(rownames(res)[res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1.5],
      file="analysis/DESeq2/differential-expression_M-vs-F_date-blocked_gene-list.txt")

# to change the design e.g for sex
# design(dds) <- ~sex

F vs V date blocked

# dds <- DESeqDataSetFromMatrix(
#   countData = count.table,
#   colData = data.frame(condition=colnames(count.table),
#                        sex=samples$Sex,
#                        date=samples$Sampling,
#                        rep=samples$Sample),
#   design = ~date+sex)
# dds <- DESeq(dds)
# plotDispEsts(dds)
resultsNames(dds)
## [1] "Intercept" "date09.12" "date10.08" "sexF"      "sexM"      "sexV"
res <- results(dds,contrast=c("sex","F","V"))

Compare different cutoffs

alpha=0.01
BiocGenerics::plotMA(res,alpha=alpha)

volcanoPlot(res,alpha=alpha)

volcanoPlot(res,alpha=0.001)

volcanoPlot(res,alpha=1e-6)

hist(res$padj,breaks=seq(0,1,.01))

sum(res$padj<1e-6,na.rm=TRUE)
## [1] 613
sum(res$padj<1e-3,na.rm=TRUE)
## [1] 1756
table(sign(res$log2FoldChange[res$padj<1e-6]))
## 
##  -1   1 
## 206 407
table(sign(res$log2FoldChange[res$padj<1e-3]))
## 
##   -1    1 
##  684 1072
plot(density(res$log2FoldChange[res$padj<1e-3 & !is.na(res$padj)]))
abline(v=c(-1,1))

sum(res$padj<1e-2,na.rm=TRUE)
## [1] 3053
table(sign(res$log2FoldChange[res$padj<1e-2]))
## 
##   -1    1 
## 1315 1738

Number of DE genes padj cutoff 1e-2 and log2FoldChange cutoff 1.5

sum(res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1.5)
## [1] 964

Write files

write.csv2(res,file="analysis/DESeq2/differential-expression_F-vs-V_date-blocked_semi-colon.csv")
write(rownames(res)[res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1.5],
      file="analysis/DESeq2/differential-expression_F-vs-V_date-blocked_gene-list.txt")
FvsV.effect.date.blocked <- rownames(res[!is.na(res$padj) & res$padj < alpha,])

Compare dates

res <- results(dds,contrast=c("date","10-08","09-12"))
BiocGenerics::plotMA(res,alpha=alpha)

volcanoPlot(res,alpha=alpha)

date.effect.sex.blocked <- rownames(res[!is.na(res$padj) & res$padj < alpha,])

Number of DE genes, padj cutoff 1e-2 + log2FoldChange cutoff 1.5

sum(res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1.5)
## [1] 138

Write files

write.csv2(res,file="analysis/DESeq2/differential-expression_10-08-vs-09-12_sex-blocked_semi-colon.csv")
write(rownames(res)[res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1.5],
      file="analysis/DESeq2/differential-expression_10-08-vs-09-12_sex-blocked_gene-list.txt")

The complete model We include the genotype.

dds <- DESeqDataSetFromMatrix(
  countData = count.table,
  colData = data.frame(condition=colnames(count.table),
                       sex=samples$Sex,
                       date=samples$Sampling,
                       rep=samples$Sample,
                       genotype=ifelse(samples$Sample %in% c(1,2,4),"G1","G2")),
  design = ~sex+genotype+date)
dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing

Compare dates again (sex and genotype blocked) Contrast is 10-08 vs 09-12, so expression is (log2) 10-08 - 09-12 (linear 10-08/09-12). Positive logFC indicate higher expression in 10-08.

res <- results(dds,contrast=c("date","10-08","09-12"))
BiocGenerics::plotMA(res,alpha=alpha)

volcanoPlot(res,alpha=alpha)

hist(res$log2FoldChange[res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1.5],breaks=seq(-10,10,0.5))

hist(res$log2FoldChange[res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1],breaks=seq(-10,10,0.5))

date.effect.genotype.sex.blocked <- rownames(res[!is.na(res$padj) & res$padj < alpha,])

Number of DE genes, padj cutoff 1e-2 + log2FoldChange cutoff 1.5

sum(res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1.5)
## [1] 175

Number of DE genes, padj cutoff 1e-2 + log2FoldChange cutoff 1

sum(res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1)
## [1] 871

Venn diagram to compare the lists of date DE genes with and without genotype blocked

plot.new()
grid.draw(venn.diagram(list(date.effect.sex.blocked,date.effect.genotype.sex.blocked),
  filename=NULL,
  category.names=c("with G","blocked G"),
  col=pal[1:2]
))

Write files

write.csv2(res,file="analysis/DESeq2/differential-expression_10-08-vs-09-12_sex_and_genotype-blocked_semi-colon.csv")
write(rownames(res)[res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1.5],
      file="analysis/DESeq2/differential-expression_10-08-vs-09-12_sex_and_genotype-blocked_gene-list.txt")

Compare male with female again (date and genotype blocked)

res <- results(dds,contrast=c("sex","M","F"))
BiocGenerics::plotMA(res,alpha=alpha)

volcanoPlot(res,alpha=alpha)

sex.effect.genotype.date.blocked <- rownames(res[!is.na(res$padj) & res$padj < alpha,])
sex.effect.genotype.date.blocked.2 <-rownames(res[!is.na(res$padj) & res$padj < 1e-2 & abs(res$log2FoldChange) >= 1.5,])

Venn diagram to compare the lists of date DE genes with and without genotype blocked

plot.new()
grid.draw(venn.diagram(list(sex.effect.date.blocked,sex.effect.genotype.date.blocked),
                       filename=NULL,
                       category.names=c("with G","blocked G"),
                       col=pal[1:2]
))

Number of DE genes, padj cutoff 1e-2 + log2FoldChange cutoff 1.5

sum(res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1.5)
## [1] 2587

Write files

write.csv2(res,file="analysis/DESeq2/differential-expression_M-vs-F_date_and_genotype-blocked_semi-colon.csv")
write(rownames(res)[res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1.5],
      file="analysis/DESeq2/differential-expression_M-vs-F_date_and_genotype-blocked_gene-list.txt")

Compare female vs veg again (date and genotype blocked)

res <- results(dds,contrast=c("sex","F","V"))
BiocGenerics::plotMA(res,alpha=alpha)

volcanoPlot(res,alpha=alpha)

Number of DE genes, padj cutoff 1e-2 + log2FoldChange cutoff 1.5

sum(res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1.5)
## [1] 1067
FvsV.effect.genotype.date.blocked <- rownames(res[!is.na(res$padj) & res$padj < alpha,])
FvsV.effect.genotype.date.blocked.2 <-rownames(res[res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1.5,])

Venn diagram to compare the lists of DE genes with and without genotype blocked

plot.new()
grid.draw(venn.diagram(list(FvsV.effect.date.blocked,FvsV.effect.genotype.date.blocked),
                       filename=NULL,
                       category.names=c("with G","blocked G"),
                       col=pal[1:2]))

Write files

write.csv2(res,file="analysis/DESeq2/differential-expression_F-vs-V_date_and_genotype-blocked_semi-colon.csv")
write(rownames(res)[res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1.5],
  file="analysis/DESeq2/differential-expression_F-vs-V_date_and_genotype-blocked_gene-list.txt")                      

Compare male vs veg (date and genotype blocked)

res <- results(dds,contrast=c("sex","M","V"))
BiocGenerics::plotMA(res,alpha=alpha)

volcanoPlot(res,alpha=alpha)

Number of DE genes, padj cutoff 1e-2 + log2FoldChange cutoff 1.5

sum(res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1.5)
## [1] 2771
MvsV.effect.genotype.date.blocked <- rownames(res[!is.na(res$padj) & res$padj < alpha,])
MvsV.effect.genotype.date.blocked.2 <-rownames(res[res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1.5,])

Write files

write.csv2(res,file="analysis/DESeq2/differential-expression_M-vs-V_date_and_genotype-blocked_semi-colon.csv")
write(rownames(res)[res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1.5],
      file="analysis/DESeq2/differential-expression_M-vs-V_date_and_genotype-blocked_gene-list.txt")

Compare genotype G1 vs G2 (date and sex blocked)

res <- results(dds,contrast=c("genotype","G1","G2"))
BiocGenerics::plotMA(res,alpha=alpha)

volcanoPlot(res,alpha=alpha)

Number of DE genes, padj cutoff 1e-2 + log2FoldChange cutoff 1.5

sum(res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1.5)
## [1] 749

Write files

write.csv2(res,file="analysis/DESeq2/differential-expression_G1-vs-G2_date_and_sex-blocked_semi-colon.csv")
write(rownames(res)[res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1.5],
      file="analysis/DESeq2/differential-expression_G1-vs-G2_date_and_sex-blocked_gene-list.txt")

Venn diagram to compare the lists of DE genes comparing M vs F, F vs V and M vs V

plot.new()
grid.draw(venn.diagram(list(sex.effect.genotype.date.blocked,FvsV.effect.genotype.date.blocked,MvsV.effect.genotype.date.blocked),
                       filename=NULL,
                       category.names=c("M vs F","F vs V","M vs V"),
                       col=pal[1:3]))

plot.new()
grid.draw(venn.diagram(list(sex.effect.genotype.date.blocked.2,FvsV.effect.genotype.date.blocked.2,MvsV.effect.genotype.date.blocked.2),
                       filename=NULL,
                       category.names=c("M vs F","F vs V","M vs V"),
                       col=pal[1:3]))

List of DE genes common between the three comparisons

common.MvsF.FvsV.MvsV.gene.list <- intersect(sex.effect.genotype.date.blocked.2,intersect(FvsV.effect.genotype.date.blocked.2,MvsV.effect.genotype.date.blocked.2))
write(common.MvsF.FvsV.MvsV.gene.list,
      file="analysis/DESeq2/common_MvsF_FvsV_MvsV_date_and_genotype-blocked_gene-list.txt")

List of DE genes present only in MvsF comparison

MvsF.minus.FvsV.gene.list <- setdiff(sex.effect.genotype.date.blocked.2,FvsV.effect.genotype.date.blocked.2)
MvsF.only.gene.list <- setdiff(MvsF.minus.FvsV.gene.list,MvsV.effect.genotype.date.blocked.2)

List of DE genes common to MvsF and MvsV but not present in FvsV

common.MvsF.MvsV.gene.list <- intersect(sex.effect.genotype.date.blocked.2,MvsV.effect.genotype.date.blocked.2)
common.MvsF.MvsV.minus.FvsV.gene.list <-setdiff(common.MvsF.MvsV.gene.list,FvsV.effect.genotype.date.blocked.2)
write(common.MvsF.MvsV.minus.FvsV.gene.list,
file="analysis/DESeq2/common_MvsF_MvsV_minus_FvsV_date_and_genotype-blocked_gene-list.txt")

List of DE genes common to MvsF and FvsV but not present in MvsV

common.MvsF.FvsV.gene.list <- intersect(sex.effect.genotype.date.blocked.2,FvsV.effect.genotype.date.blocked.2)
common.MvsF.FvsV.minus.MvsV.gene.list <-setdiff(common.MvsF.FvsV.gene.list,MvsV.effect.genotype.date.blocked.2)
write(common.MvsF.FvsV.minus.MvsV.gene.list,
      file="analysis/DESeq2/common_MvsF_FvsV_minus_MvsV_date_and_genotype-blocked_gene-list.txt")

List of DE genes common to FvsV and MvsV but not present in MvsF

common.FvsV.MvsV.gene.list <- intersect(FvsV.effect.genotype.date.blocked.2,MvsV.effect.genotype.date.blocked.2)
common.FvsV.MvsV.minus.MvsF.gene.list <-setdiff(common.FvsV.MvsV.gene.list,sex.effect.genotype.date.blocked.2)
write(common.FvsV.MvsV.minus.MvsF.gene.list,
      file="analysis/DESeq2/common_FvsV_MvsV_minus_MvsF_date_and_genotype-blocked_gene-list.txt")

To compare the dates in female samples separately

 dds <- DESeqDataSetFromMatrix(
countData = count.table,
colData = data.frame(condition=colnames(count.table),
                     sex=samples$Sex,
                     date=samples$Sampling,
                     rep=samples$Sample,
                     genotype=ifelse(samples$Sample %in% c(1,2,4),"G1","G2"),
                     Fdate=ifelse(samples$Sex %in% c("F"),samples$Sampling,"00-00")),
design = ~genotype+Fdate)
dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 1026 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
res <- results(dds,contrast=c("Fdate","10-08","09-12"))
BiocGenerics::plotMA(res,alpha=alpha)

volcanoPlot(res,alpha=alpha)

Number of DE genes, padj cutoff 1e-2 + log2FoldChange cutoff 1.5

sum(res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1.5)
## [1] 106

Write files

write.csv2(res,file="analysis/DESeq2/differential-expression_10-08vs09-12_female_only_genotype-blocked_s
 emi-colon.csv")
write(rownames(res)[res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >=1.5],file="analysis/DESeq2/differential-expression_10-08vs09-12_female_only_genotype-blocked_gene-list.txt")

To compare the dates in male samples separately

dds <- DESeqDataSetFromMatrix(
  countData = count.table,
  colData = data.frame(condition=colnames(count.table),
                       sex=samples$Sex,
                       date=samples$Sampling,
                       rep=samples$Sample,
                       genotype=ifelse(samples$Sample %in% c(1,2,4),"G1","G2"),
                       Mdate=ifelse(samples$Sex %in% c("M"),samples$Sampling,"00-00")),
  design = ~genotype+Mdate)
dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## 1 rows did not converge in beta, labelled in mcols(object)$betaConv. Use larger maxit argument with nbinomWaldTest
## -- replacing outliers and refitting for 1269 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
res <- results(dds,contrast=c("Mdate","10-08","09-12"))
BiocGenerics::plotMA(res,alpha=alpha)

volcanoPlot(res,alpha=alpha)

Number of DE genes, padj cutoff 1e-2 + log2FoldChange cutoff 1.5

sum(res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1.5)
## [1] 231

Write files

write.csv2(res,file="analysis/DESeq2/differential-expression_10-08vs09-12_male_only_genotype-blocked_s
 emi-colon.csv")
write(rownames(res)[res$padj<1e-2 & !is.na(res$padj) & abs(res$log2FoldChange) >=1.5],file="analysis/DESeq2/differential-expression_10-08vs09-12_male_only_genotype-blocked_gene-list.txt")